# Load sources, articles, journos
datadir <- "data/masterdata2"
all.articles <- read_tsv(here(datadir, "articles.tsv"))
all.sources <- read_tsv(here(datadir, "sources.tsv"))
sources <- all.sources %>% filter(policy_label_gpt)
articles <- all.articles %>% filter(policy_label_gpt)
authors <- articles %>%
group_by(author_name, elite_undergrad_ivyplus, edu.undergrad, edu.has_postgrad, is_career, field.journo, age_est_2017, gender, race.nonwhite) %>%
summarize(n = n()) %>%
filter(!is.na(author_name))
articles.repna <- articles %>% mutate(
field.journo = replace_na(field.journo, FALSE),
elite_undergrad_ivyplus = replace_na(elite_undergrad_ivyplus, FALSE),
edu.has_postgrad = replace_na(edu.has_postgrad, FALSE),
)
sources.repna <- sources %>% mutate(
field.journo = replace_na(field.journo, FALSE),
elite_undergrad_ivyplus = replace_na(elite_undergrad_ivyplus, FALSE),
edu.has_postgrad = replace_na(edu.has_postgrad, FALSE),
)
nrow(articles)
## [1] 13136
nrow(sources)
## [1] 77454
nrow(authors)
## [1] 1842
articles %>%
group_by(year, source) %>%
summarize(n = n()) %>%
ggplot(aes(x = year, y = n, color = source)) +
geom_line() +
theme_bw() +
scale_x_continuous(breaks = c(2012, 2014, 2016, 2018, 2020, 2022)) +
labs(
x = "Year",
y = "Number of Articles",
color = "Newspaper"
) +
theme(
panel.grid.minor = element_blank()
)
ggsave(here("paper/figures/time_trend_n_articles.png"), width = 6, height = 4)
articles$n %>% hist()
summary(articles$n)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 6.000 5.896 8.000 31.000
sources %>%
group_by(year) %>%
summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles) %>%
ggplot(aes(x = year, y = avg_sources)) +
geom_line() +
scale_x_continuous(breaks = seq(2012, 2022, 2)) +
theme_bw()
sources %>%
group_by(year > 2016) %>%
summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles)
## # A tibble: 2 × 4
## `year > 2016` n n_articles avg_sources
## <lgl> <int> <int> <dbl>
## 1 FALSE 17586 3336 5.27
## 2 TRUE 59868 9800 6.11
sources %>%
group_by(source) %>%
summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles)
## # A tibble: 6 × 4
## source n n_articles avg_sources
## <chr> <int> <int> <dbl>
## 1 Chicago Tribune 3052 578 5.28
## 2 Los Angeles Times 10173 1796 5.66
## 3 New York Times 28282 4671 6.05
## 4 USA Today 3397 684 4.97
## 5 Wall Street Journal 12081 2286 5.28
## 6 Washington Post 20469 3121 6.56
sources %>%
group_by(source, year) %>%
summarize(n = n(), n_articles = length(unique(filename)), avg_sources = n / n_articles) %>%
ggplot(aes(x = year, y = avg_sources, color = source)) +
geom_line()
# The example articles
# filter(str_detect(filename, "1270322870.xml|1731405273.xml")) %>%
gen.cats <- table(sources$category) %>%
prop.table() %>%
as.data.frame() %>%
rename(Category = "Var1") %>%
mutate(
Freq = paste0(round(100 * Freq, digits = 1), "%")
)
sources$category2 <- case_when(
sources$env_category == "environmental" ~ paste(sources$category, "- Environmental"),
sources$env_category == "fossil fuel" ~ paste(sources$category, "- Fossil Fuel"),
sources$category == "Politician" & sources$pol_party == "Democrat" ~ "Politician - Democrat",
sources$category == "Politician" & sources$pol_party == "Republican" ~ "Politician - Republican",
sources$category == "Politician" & sources$category == "Politician" ~ "Politician - International",
sources$category == "Citizen" ~ "Other",
sources$category == "Advocacy" ~ "Advocacy - Other",
sources$category == "Business" ~ "Business - Other",
TRUE ~ sources$category
)
sub.cats <- table(sources$category2) %>%
prop.table() %>%
as.data.frame() %>%
rename(Category = "Var1") %>%
mutate(
Freq = paste0(round(100 * Freq, digits = 1), "%")
)
rbind(gen.cats, sub.cats) %>% distinct()
## Category Freq
## 1 Academic 10.2%
## 2 Advocacy 21.5%
## 3 Bureaucrat 12.4%
## 4 Business 15%
## 5 International 15.2%
## 6 Media 5.6%
## 7 Other 4.1%
## 8 Politician 16%
## 9 Advocacy - Environmental 13.7%
## 10 Advocacy - Fossil Fuel 0.4%
## 11 Advocacy - Other 7.5%
## 12 Business - Environmental 2%
## 13 Business - Fossil Fuel 4.3%
## 14 Business - Other 8.6%
## 15 Politician - Democrat 9.7%
## 16 Politician - International 2.2%
## 17 Politician - Republican 4.1%
dt <- sources %>%
filter(is.na(gov_category)) %>%
group_by(org_id) %>%
summarize(
n = n(), cfscore = mean(cfscore, na.rm = T), name = first(organization_name), category.slant = first(category.slant)
) %>%
arrange(desc(n)) %>%
filter(!is.na(cfscore)) %>%
head(50) %>%
arrange(n) %>%
mutate(rn = 51 - row_number(), n_size = n + 10)
dt %>% ggplot(aes(x = cfscore, size = sqrt(n), y = n, color = cfscore)) +
theme_bw() +
# scale_y_continuous(limits = c(-1, 0.8)) +
scale_y_log10() +
# scale_y_continuous(trans='reverse') +
scale_x_continuous(limits = c(-1.5, 1.2)) +
scale_color_gradient(low = "blue", high = "red") +
geom_vline(xintercept = 0, lty = "dashed") +
theme(panel.grid = element_blank(), axis.text.y = element_blank()) +
labs(x = "DIME Ideology", y = "") +
geom_text_repel(
data = subset(dt, category.slant != "other" & !(name %in% c("The Associated Press", "Associated Press", "California"))),
aes(x = cfscore, label = name),
direction = "y",
min.segment.length = 999
) +
guides(size = "none", color = "none", fill = "none")
ggsave(here("paper/figures/dime-scatter-100.png"), width = 10, height = 6)
# scratch
dt <- sources %>%
filter(is.na(gov_category)) %>%
group_by(org_id) %>%
summarize(
n = n(), cfscore = mean(cfscore, na.rm = T), name = first(organization_name), category.slant = first(category.slant)
) %>%
arrange(desc(n)) %>%
filter(!is.na(cfscore)) %>%
head(7)
dt
## # A tibble: 7 × 5
## org_id n cfscore name category.slant
## <dbl> <int> <dbl> <chr> <chr>
## 1 3604 1288 0.265 United States of America Democrat
## 2 9187 786 0.287 United States Government Republican
## 3 3487 656 -1.04 Natural Resources Defense Council (NRDC) Environmental
## 4 5505 430 -1.10 Environmental Defense Fund Environmental
## 5 1072 389 1.01 International Energy Agency Advocacy
## 6 10671 384 0.193 American Petroleum Institute Fossil Fuel
## 7 1620 345 -0.960 Sierra Club Environmental
sources %>% filter(org_id %in% dt$org_id) %>% group_by(year, organization_name, org_id, cfscore) %>% summarize(n=n()) %>% ggplot(aes(
x=year, y=n, color=cfscore < 0, lty=organization_name
)) +
geom_smooth(se=F)
pres <- sources %>%
mutate(
pres = case_when(
str_detect(person_name, "Biden") ~ "Biden",
str_detect(person_name, "Obama") ~ "Obama",
str_detect(person_name, "Trump") ~ "Trump",
category.slant %in% c("Democrat", "Republican") ~ category.slant,
TRUE ~ NA
)
)
npy <- sources %>%
group_by(year) %>%
summarize(npy = n())
pres %>%
group_by(year, pres) %>%
filter(!is.na(pres)) %>%
summarize(n = n()) %>%
ggplot(aes(x = year, y = n, color = pres)) +
geom_line() +
theme_bw()
# Normalize by # of sources per year...
pres %>%
group_by(year, pres) %>%
filter(!is.na(pres)) %>%
summarize(n = n()) %>%
left_join(npy) %>%
ggplot(aes(x = year, y = n / npy, color = pres, shape = pres)) +
geom_line() +
geom_point() +
scale_x_continuous(breaks = seq(2012, 2022, 4)) +
theme_bw()
ggarrange(plotlist = list(
authors %>% ggplot(aes(x = age_est_2017)) +
geom_histogram(),
authors %>% ggplot(aes(x = elite_undergrad_ivyplus)) +
geom_bar(stat = "count"),
authors %>% ggplot(aes(x = edu.has_postgrad)) +
geom_bar(stat = "count"),
authors %>% ggplot(aes(x = field.journo)) +
geom_bar(stat = "count"),
authors %>% ggplot(aes(x = gender)) +
geom_bar(stat = "count"),
authors %>% ggplot(aes(x = race.nonwhite)) +
geom_bar(stat = "count")
))
Unit of analysis: unique journalists per year
journo.years <- articles %>%
group_by(author_name, year, elite_undergrad_ivyplus, edu.undergrad, edu.has_postgrad, is_career, field.journo, age_est, gender, race.nonwhite) %>%
summarize(n = n()) %>%
filter(!is.na(author_name))
age.plt <- journo.years %>%
ggplot(aes(x = year, y = as.numeric(age_est > 35))) +
geom_smooth(method = "loess") +
labs(x = "Year", y = "Age > 35")
ivy.plt <- journo.years %>%
ggplot(aes(x = year, y = as.numeric(elite_undergrad_ivyplus))) +
geom_smooth(method = "loess") +
labs(y = "Ivy League", x = "Year")
fj.plt <- journo.years %>%
ggplot(aes(x = year, y = as.numeric(field.journo))) +
labs(y = "Journalism Degree", x = "Year") +
geom_smooth(method = "loess")
pg.plt <- journo.years %>%
ggplot(aes(x = year, y = as.numeric(edu.has_postgrad))) +
labs(y = "Postgraduate Degree", x = "Year") +
geom_smooth(method = "loess")
gender.plt <- journo.years %>%
ggplot(aes(x = year, y = as.numeric(gender == "female"))) +
labs(y = "Female", x = "Year") +
geom_smooth(method = "loess")
race.plt <- journo.years %>%
ggplot(aes(x = year, y = as.numeric(race.nonwhite))) +
labs(y = "Nonwhite", x = "Year") +
geom_smooth(method = "loess")
plts <- list(age.plt, ivy.plt, fj.plt, gender.plt, pg.plt, race.plt) %>%
lapply(function(p) {
p +
theme_bw() +
theme(panel.grid = element_blank()) +
scale_x_continuous(breaks = seq(2012, 2022, 2))
})
ggarrange(plotlist = plts)
ggsave(here("paper/figures/journo-agg-2.png"), width = 10, height = 6)
articles %>%
pivot_longer(cols = c("n_right_dime_other", "n_left_dime_other", "n_rep", "n_dem", "n_env", "n_ff", "n_center_total")) %>%
ggplot(aes(x = date, y = value, color = name)) +
geom_smooth(se = F) +
theme_bw()
sources %>%
group_by(year, category.slant) %>%
summarize(n = n()) %>%
ggplot(aes(x = year, y = n, color = category.slant, lty = category.slant)) +
geom_smooth(se = F) +
theme_bw()
sources_per_year <- sources %>%
group_by(year) %>%
summarize(year.n = n())
sources %>%
group_by(year, category.slant) %>%
summarize(n = n()) %>%
left_join(sources_per_year) %>%
ggplot(aes(x = year, y = n / year.n, color = category.slant, lty = category.slant)) +
geom_line() +
theme_bw()
ggplot(articles, aes(x = ideo.mean.i, fill = bal.at_least_one_both_sides)) +
geom_histogram() +
theme_bw()
ggplot(articles, aes(x = bal.diff_lr_normalized_nonabs, fill = bal.at_least_one_both_sides)) +
geom_histogram() +
theme_bw()
# With loess
slant_overview_plt <- articles %>%
pivot_longer(
cols = c("ideo.mean.i", "ideo.mean.pols", "ideo.mean.i.orgs"),
) %>%
mutate(
name = dplyr::recode(name, ideo.mean.i = "All", "ideo.mean.pols" = "Politicians", "ideo.mean.i.orgs" = "Organizations"),
size = case_when(name == "All" ~ 1.5, TRUE ~ 1),
lty = case_when(name == "All" ~ 1, TRUE ~ 2)
) %>%
ggplot(aes(x = date, y = value, color = name, lty = as.factor(lty))) +
geom_smooth(se = F, method = "loess") +
theme_bw() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
) +
guides(
linetype = "none",
size = "none"
) +
labs(
y = "Mean article slant",
x = "Year",
color = "Slant Type"
)
balance_plt <- articles %>%
pivot_longer(cols = c("balance_all", "balance_ff_env", "balance_rep_dem")) %>%
mutate(
name = dplyr::recode(name, balance_all = "All", balance_ff_env = "Organizations", balance_rep_dem = "Politicians"),
size = case_when(name == "All" ~ 1.5, TRUE ~ 1),
lty = case_when(name == "All" ~ 1, TRUE ~ 2)
) %>%
ggplot(aes(x = date, y = as.numeric(value), color = name, lty = as.factor(lty))) +
geom_smooth(se = F, method = "loess") +
theme_bw() +
theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
) +
guides(
linetype = "none",
size = "none"
) +
labs(
y = "Proportion of balanced articles",
x = "Year",
color = "Slant Type"
)
ggarrange(plotlist = list(slant_overview_plt, balance_plt), common.legend = T, legend = "bottom")
ggsave(here("paper/figures/slant-balance-overview.png"), width = 8, height = 5)
articles %>% ggplot(aes(x = date, y = as.numeric(bal.diff_lr_normalized > 0))) +
geom_smooth() +
theme_bw()
articles$ideo.sd.i %>% hist
ggplot(articles, aes(x=date, y=ideo.sd.i)) + geom_smooth()
# Really interesting shape, although the magnitude is maybe not as large?
# Hard to interpret move from 0.65SD peak to 0.55 dip
ggarrange(plotlist=list(
ggplot(articles, aes(x=date, y=ideo.sd.orgs)) + geom_smooth(),
ggplot(articles, aes(x=date, y=ideo.sd.pols)) + geom_smooth()
))
# Mostly driven by orgs...
ggplot(articles, aes(x=date, y=ideo.sd.i, color=source)) + geom_smooth(se=F)
# Wow that's kinda interesting for sure
articles %>% ggplot(aes(x = date, y = ideo.mean.i, color = source)) +
geom_smooth(se = F, method = "loess") +
labs(title = "Article slant by average organization DIME score")
articles %>% ggplot(aes(x = date, y = ideo.mean.i.orgs, color = source)) +
geom_smooth(se = F, method = "loess")
articles %>% ggplot(aes(x = date, y = ideo.mean.pols, color = source)) +
geom_smooth(se = F, method = "loess")
make_fmla <- function(y, covariates, festring = "| year + source") {
covariate_str <- paste0(covariates, collapse = "+")
as.formula(paste(y, "~", covariate_str, festring))
}
make_models <- function(df, yvars, covariates, festring = "| year + source", cluster = "author_name") {
lapply(yvars, \(y) {
feols(make_fmla(y, covariates, festring), data = df, cluster = cluster)
})
}
non_edu_covars <- c(
"(age_est_2017 > 30)",
"gender",
"race.nonwhite"
)
j_covars <- c(
"elite_undergrad_ivyplus",
"edu.has_postgrad",
"field.journo",
non_edu_covars
)
models <- c(
make_models(articles, c("n", "n_unique_source_category"), j_covars),
make_models(articles, c("balance_all", "ideo.mean.i", "ideo.sd.i"), c(j_covars, "n"))
)
etable(models)
## model 1 model 2
## Dependent Var.: n n_unique_source_category
##
## elite_undergrad_ivyplusTRUE 0.3623* (0.1578) 0.1872* (0.0771)
## edu.has_postgradTRUE -0.1035 (0.1365) 0.0128 (0.0684)
## field.journoTRUE -0.1710 (0.1317) -0.0382 (0.0658)
## age_est_2017>30TRUE -0.1442 (0.1783) 0.0804 (0.1049)
## gendermale 0.0880 (0.1281) -0.0423 (0.0706)
## race.nonwhiteTRUE 0.1511 (0.1690) -0.0275 (0.0765)
## n
## Fixed-Effects: ---------------- ------------------------
## year Yes Yes
## source Yes Yes
## ___________________________ ________________ ________________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.05600 0.04279
## Within R2 0.00621 0.00653
##
## model 3 model 4
## Dependent Var.: balance_all ideo.mean.i
##
## elite_undergrad_ivyplusTRUE 0.1335*** (0.0262) 0.0888*** (0.0193)
## edu.has_postgradTRUE -0.0285 (0.0255) -0.0223 (0.0188)
## field.journoTRUE 0.1060*** (0.0276) 0.0627** (0.0221)
## age_est_2017>30TRUE 0.0345 (0.0249) 0.0518** (0.0180)
## gendermale -0.0153 (0.0226) -0.0122 (0.0174)
## race.nonwhiteTRUE -0.0773. (0.0421) -0.0716* (0.0303)
## n 0.0500*** (0.0028) 0.0076*** (0.0021)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 8,449 8,636
## R2 0.11423 0.07208
## Within R2 0.10020 0.01644
##
## model 5
## Dependent Var.: ideo.sd.i
##
## elite_undergrad_ivyplusTRUE 0.0764*** (0.0212)
## edu.has_postgradTRUE -0.0128 (0.0197)
## field.journoTRUE 0.0505** (0.0176)
## age_est_2017>30TRUE 0.0258 (0.0182)
## gendermale -0.0477** (0.0174)
## race.nonwhiteTRUE -0.0643*** (0.0187)
## n 0.0054* (0.0023)
## Fixed-Effects: -------------------
## year Yes
## source Yes
## ___________________________ ___________________
## S.E.: Clustered by: author_name
## Observations 7,231
## R2 0.04577
## Within R2 0.02405
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etable(c(
make_models(articles.repna, c("balance_all", "ideo.mean.i", "ideo.sd.i"), c("elite_undergrad_ivyplus*gender", "n"))
))
## model 1 model 2
## Dependent Var.: balance_all ideo.mean.i
##
## elite_undergrad_ivyplusTRUE 0.1743*** (0.0380) 0.1064*** (0.0311)
## gendermale 0.0471 (0.0287) 0.0274 (0.0234)
## n 0.0510*** (0.0025) 0.0081*** (0.0021)
## elite_undergrad_ivyplusTRUE x gendermale -0.1454** (0.0463) -0.0652. (0.0378)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ________________________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 10,713 11,024
## R2 0.10376 0.06133
## Within R2 0.09276 0.00788
##
## model 3
## Dependent Var.: ideo.sd.i
##
## elite_undergrad_ivyplusTRUE 0.1233*** (0.0286)
## gendermale 0.0052 (0.0167)
## n 0.0062** (0.0019)
## elite_undergrad_ivyplusTRUE x gendermale -0.1039** (0.0345)
## Fixed-Effects: ------------------
## year Yes
## source Yes
## ________________________________________ __________________
## S.E.: Clustered by: author_name
## Observations 9,152
## R2 0.03601
## Within R2 0.01858
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Basically same results with sources as the unit of analysis
c(
make_models(sources, c("cfscore"), c(j_covars)),
make_models(sources.repna, c("cfscore"), c(j_covars))
) %>% etable()
## model 1 model 2
## Dependent Var.: cfscore cfscore
##
## elite_undergrad_ivyplusTRUE 0.0948*** (0.0256) 0.1010*** (0.0244)
## edu.has_postgradTRUE -0.0307 (0.0239) -0.0199 (0.0230)
## field.journoTRUE 0.0209 (0.0316) 0.0352 (0.0282)
## age_est_2017>30TRUE 0.0701*** (0.0206) 0.0699*** (0.0196)
## gendermale -0.0113 (0.0217) -0.0171 (0.0208)
## race.nonwhiteTRUE -0.0396 (0.0414) -0.0257 (0.0394)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 18,459 21,225
## R2 0.03510 0.03238
## Within R2 0.00452 0.00450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
j_covars2 <- c(
"elite_undergrad_ivyplus",
"edu.has_postgrad",
non_edu_covars
)
models.alt.nj <- c(
make_models(articles, c("n", "n_unique_source_category"), j_covars2),
make_models(articles, c("balance_all", "ideo.mean.i"), c(j_covars2, "n"))
)
etable(models.alt.nj)
## model 1 model 2
## Dependent Var.: n n_unique_source_category
##
## elite_undergrad_ivyplusTRUE 0.2944* (0.1437) 0.1481* (0.0752)
## edu.has_postgradTRUE -0.0584 (0.1247) 0.0230 (0.0664)
## age_est_2017>30TRUE -0.0852 (0.1756) 0.0791 (0.1009)
## gendermale -0.0309 (0.1245) -0.0731 (0.0659)
## race.nonwhiteTRUE 0.0873 (0.1662) -0.0194 (0.0746)
## n
## Fixed-Effects: ---------------- ------------------------
## year Yes Yes
## source Yes Yes
## ___________________________ ________________ ________________________
## S.E.: Clustered by: author_name by: author_name
## Observations 10,612 10,612
## R2 0.05368 0.04222
## Within R2 0.00286 0.00434
##
## model 3 model 4
## Dependent Var.: balance_all ideo.mean.i
##
## elite_undergrad_ivyplusTRUE 0.0787** (0.0272) 0.0595** (0.0194)
## edu.has_postgradTRUE -0.0378 (0.0270) -0.0344. (0.0193)
## age_est_2017>30TRUE 0.0274 (0.0264) 0.0470* (0.0186)
## gendermale -0.0215 (0.0255) -0.0049 (0.0191)
## race.nonwhiteTRUE -0.0703. (0.0411) -0.0748** (0.0285)
## n 0.0505*** (0.0026) 0.0069*** (0.0021)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,550 9,771
## R2 0.10337 0.06549
## Within R2 0.09119 0.01200
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
articles2 <- articles %>% mutate(
field.journo = replace_na(field.journo, FALSE),
elite_undergrad_ivyplus = replace_na(elite_undergrad_ivyplus, FALSE),
edu.has_postgrad = replace_na(edu.has_postgrad, FALSE),
)
models.alt.repna <- c(
make_models(articles2, c("n", "n_unique_source_category"), j_covars),
make_models(articles2, c("balance_all", "ideo.mean.i"), c(j_covars, "n"))
)
models.alt.repna.nofe <- c(
make_models(articles2, c("n", "n_unique_source_category"), j_covars, festring = "| year"),
make_models(articles2, c("balance_all", "ideo.mean.i"), c(j_covars, "n"), festring = "| year")
)
etable(models.alt.repna)
## model 1 model 2
## Dependent Var.: n n_unique_source_category
##
## elite_undergrad_ivyplusTRUE 0.2891. (0.1488) 0.1619* (0.0708)
## edu.has_postgradTRUE -0.0737 (0.1255) 0.0255 (0.0615)
## field.journoTRUE -0.1119 (0.1211) -0.0018 (0.0633)
## age_est_2017>30TRUE -0.0114 (0.1673) 0.0959 (0.0927)
## gendermale -0.0273 (0.1206) -0.0733 (0.0636)
## race.nonwhiteTRUE 0.0902 (0.1598) -0.0173 (0.0714)
## n
## Fixed-Effects: ---------------- ------------------------
## year Yes Yes
## source Yes Yes
## ___________________________ ________________ ________________________
## S.E.: Clustered by: author_name by: author_name
## Observations 11,139 11,139
## R2 0.05525 0.04316
## Within R2 0.00345 0.00520
##
## model 3 model 4
## Dependent Var.: balance_all ideo.mean.i
##
## elite_undergrad_ivyplusTRUE 0.1173*** (0.0263) 0.0810*** (0.0193)
## edu.has_postgradTRUE -0.0142 (0.0255) -0.0148 (0.0184)
## field.journoTRUE 0.1082*** (0.0250) 0.0614** (0.0208)
## age_est_2017>30TRUE 0.0276 (0.0237) 0.0507** (0.0167)
## gendermale -0.0267 (0.0234) -0.0126 (0.0176)
## race.nonwhiteTRUE -0.0563 (0.0390) -0.0670* (0.0275)
## n 0.0513*** (0.0026) 0.0074*** (0.0021)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,977 10,243
## R2 0.11047 0.06560
## Within R2 0.09869 0.01402
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
all.models <- c(models, models.alt.nj, models.alt.repna, models.alt.repna.nofe)
plot_models <- function(models, title) {
mtypes <- c("Standard", "No Education Field", "Replace NAs with Defaults", "No Newspaper Fixed Effect")
ggcoefplot(as.list(models)) +
coord_flip() +
scale_color_discrete(labels = mtypes) +
scale_shape_discrete(labels = mtypes) +
labs(title = title) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
guides(fill = "none")
}
ggarrange(
plotlist = list(
plot_models(all.models[seq(1, length(all.models), 4)], title = "Number of Sources"),
plot_models(all.models[seq(2, length(all.models), 4)], title = "Number of Unique Source Types"),
plot_models(all.models[seq(3, length(all.models), 4)], title = "Balance"),
plot_models(all.models[seq(4, length(all.models), 4)], title = "Slant")
),
common.legend = T,
legend = "bottom"
)
ggsave(here("paper/figures/coefplot-robust.png"), width = 12, height = 15)
etable(models.alt.repna, tex = TRUE, digits = 2, fitstat = c("n"))
\begingroup
\centering
\begin{tabular}{lcccc}
\tabularnewline \midrule \midrule
Dependent Variables: & n & n\_unique\_source\_category & balance\_all & ideo.mean.i\\
Model: & (1) & (2) & (3) & (4)\\
\midrule
\emph{Variables}\\
elite\_undergrad\_ivyplusTRUE & 0.29$^{*}$ & 0.16$^{**}$ & 0.12$^{***}$ & 0.08$^{***}$\\
& (0.15) & (0.07) & (0.03) & (0.02)\\
edu.has\_postgradTRUE & -0.07 & 0.03 & -0.01 & -0.01\\
& (0.13) & (0.06) & (0.03) & (0.02)\\
field.journoTRUE & -0.11 & -0.002 & 0.11$^{***}$ & 0.06$^{***}$\\
& (0.12) & (0.06) & (0.03) & (0.02)\\
age\_est\_2017>30TRUE & -0.01 & 0.10 & 0.03 & 0.05$^{***}$\\
& (0.17) & (0.09) & (0.02) & (0.02)\\
gendermale & -0.03 & -0.07 & -0.03 & -0.01\\
& (0.12) & (0.06) & (0.02) & (0.02)\\
race.nonwhiteTRUE & 0.09 & -0.02 & -0.06 & -0.07$^{**}$\\
& (0.16) & (0.07) & (0.04) & (0.03)\\
n & & & 0.05$^{***}$ & 0.007$^{***}$\\
& & & (0.003) & (0.002)\\
\midrule
\emph{Fixed-effects}\\
year & Yes & Yes & Yes & Yes\\
source & Yes & Yes & Yes & Yes\\
\midrule
\emph{Fit statistics}\\
Observations & 11,139 & 11,139 & 9,977 & 10,243\\
\midrule \midrule
\multicolumn{5}{l}{\emph{Clustered (author\_name) standard-errors in parentheses}}\\
\multicolumn{5}{l}{\emph{Signif. Codes: ***: 0.01, **: 0.05, *: 0.1}}\\
\end{tabular}
\par\endgroup
ggcoefplot(as.list(c(models[1], models[2]))) +
# scale_x_discrete(limits = rev, labels = rev(c("Elite Undergrad", "Postgrad", "Journalism Degree", "Age > 30", "Male", "Non White", "Number of Sources"))) +
# scale_color_discrete(labels=c('Article Slant', 'Article Balance')) +
# scale_shape_discrete(labels=c('Article Slant', 'Article Balance')) +
coord_flip() +
labs(title = "") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
guides(fill = "none")
ggcoefplot(as.list(c(models[3], models[4]))) +
# scale_x_discrete(limits = rev, labels = rev(c("Elite Undergrad", "Postgrad", "Journalism Degree", "Age > 30", "Male", "Non White", "Number of Sources"))) +
# scale_color_discrete(labels=c('Article Slant', 'Article Balance')) +
# scale_shape_discrete(labels=c('Article Slant', 'Article Balance')) +
coord_flip() +
labs(title = "") +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
guides(fill = "none")
ggsave(here("paper/figures/main-coefplot.png"), width = 6, height = 7)
vars <- c(
"gender",
"race.nonwhite",
"elite_undergrad_ivyplus",
"edu.has_postgrad",
"field.journo"
)
est_with_age_thresh <- function(thresh) {
var <- paste0("(age_est_2017 > ", thresh, ")")
models <- make_models(articles, c("ideo.mean.i", "balance_all"), c(vars, var, "n"))
ideo.age <- models[[1]]$coeftable[6, ]
balance.age <- models[[2]]$coeftable[6, ]
res <- as.data.frame(rbind(ideo.age, balance.age))
res$yvar <- c("ideo.mean.i", "balance_all")
res$thresh <- thresh
res
}
res <- bind_rows(lapply(seq(20, 50), est_with_age_thresh))
res %>% ggplot(aes(x = thresh, y = Estimate, color = yvar, ymin = Estimate - 1.96 * `Std. Error`, ymax = Estimate + 1.96 * `Std. Error`)) +
geom_hline(yintercept = 0, lty = 2) +
geom_point() +
geom_errorbar(position = position_dodge()) +
theme_bw()
ggsave(here("paper/figures/age_robustness.png"), width = 8, height = 6)
ts <- seq(0, 0.95, 0.05)
ms <- sapply(ts, \(t) paste0("bal.diff_lr_normalized>", t))
tmodels <- make_models(articles2, ms, c(j_covars))
coefs <- lapply(ts, \(t) {
df <- as.data.frame(tmodels[[t * 10 + 1]]$coeftable)
df$t <- t
df$x <- rownames(df)
df
}) %>% bind_rows()
ggplot(coefs, aes(x = t, y = `Estimate`, ymin = `Estimate` - 1.96 * `Std. Error`, ymax = `Estimate` + 1.96 * `Std. Error`, color = `Pr(>|t|)` < 0.05)) +
geom_point() +
geom_errorbar(width = 0) +
geom_hline(yintercept = 0) +
theme_bw() +
facet_wrap(~x) +
labs()
# Journalism vaguely associated with higher diff
# Older less associated
# Ivyleaguers are more likely to be slightly biased but not super biased
# Same for ninwhites
# No gender effect
# I guess the other thing we should look at is whether its a left or right imbalance...
ts <- seq(-0.95, 0.95, 0.1)
ms <- sapply(ts, \(t) paste0("bal.diff_lr_normalized_nonabs", ifelse(t < 0, " < ", " > "), t))
feols(bal.diff_lr_normalized_nonabs < -0.5 ~ elite_undergrad_ivyplus, data = articles2)
## OLS estimation, Dep. Var.: bal.diff_lr_normalized_nonabs < -0.5
## Observations: 13,136
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.024322 0.001560 15.59480 < 2.2e-16 ***
## elite_undergrad_ivyplusTRUE -0.006350 0.002767 -2.29462 0.021771 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.147644 Adj. R2: 3.246e-4
tmodels <- make_models(articles2, ms, c(j_covars))
coefs <- lapply(seq(1, length(ts)), \(t) {
df <- as.data.frame(tmodels[[t]]$coeftable)
df$t <- ts[[t]]
df$x <- rownames(df)
df
}) %>% bind_rows()
ggplot(coefs, aes(x = t, y = `Estimate`, ymin = `Estimate` - 1.96 * `Std. Error`, ymax = `Estimate` + 1.96 * `Std. Error`, color = `Pr(>|t|)` < 0.05)) +
geom_vline(xintercept = 0, lty = 2) +
geom_point() +
geom_errorbar(width = 0) +
geom_hline(yintercept = 0) +
theme_bw() +
scale_x_reverse() +
scale_color_manual(values = c("#888888", "cyan3")) +
facet_wrap(~x) +
labs()
# So this tells us that... Ivy league Js and older Js are more likely to write
# articles that include more right leaning sources.. but not a LOT more just a bit more
# This comes from the estimates closer to 0 as the threshold moves down
# Older J's are also less likely to write articles with more left leaning sources
# Nonwhites are almost more likely to write aticles with a couple more left leaning sources (not statsig though)
# Okay and then another possibility we can look at is basically
# who is more likely to write an article that is actually unbalanced but looks balanced
make_models(articles2, c(
"bal.diff_lr_normalized > 0.25 & bal.at_least_one_both_sides",
"abs(ideo.mean.i) > 0.25 & bal.at_least_one_both_sides"
), c(j_covars))
## [[1]]
## OLS estimation
## Dep. Var.: bal.diff_lr_normalized > 0.25 & bal.at_least_one_both_sides
## Observations: 11,139
## Fixed-effects: year: 11, source: 6
## Standard-errors: Clustered (author_name)
## Estimate Std. Error t value Pr(>|t|)
## elite_undergrad_ivyplusTRUE 0.041877 0.015923 2.629916 0.0086471 **
## edu.has_postgradTRUE -0.010532 0.014763 -0.713420 0.4757211
## field.journoTRUE 0.052485 0.017470 3.004324 0.0027155 **
## age_est_2017 > 30TRUE -0.000146 0.017357 -0.008400 0.9932991
## gendermale -0.008573 0.014303 -0.599356 0.5490457
## race.nonwhiteTRUE -0.014844 0.019459 -0.762844 0.4457026
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.350077 Adj. R2: 0.010026
## Within R2: 0.005839
##
## [[2]]
## OLS estimation
## Dep. Var.: abs(ideo.mean.i) > 0.25 & bal.at_least_one_both_sides
## Observations: 10,422
## Fixed-effects: year: 11, source: 6
## Standard-errors: Clustered (author_name)
## Estimate Std. Error t value Pr(>|t|)
## elite_undergrad_ivyplusTRUE 0.067097 0.016263 4.125648 3.9561e-05 ***
## edu.has_postgradTRUE -0.004148 0.015356 -0.270097 7.8713e-01
## field.journoTRUE 0.060465 0.016996 3.557649 3.8902e-04 ***
## age_est_2017 > 30TRUE 0.011341 0.016768 0.676339 4.9896e-01
## gendermale -0.010632 0.014772 -0.719717 4.7184e-01
## race.nonwhiteTRUE -0.033952 0.021383 -1.587821 1.1259e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.40864 Adj. R2: 0.012834
## Within R2: 0.008105
# Conditional on writing a biased article, elite and journos are more likely
# to include an opposing view. this could mean they are more sophisitcated essentially?
make_models(articles2 %>% filter(bal.diff_lr_normalized > 0), c(
"bal.at_least_one_both_sides"
), c(j_covars))
## [[1]]
## OLS estimation, Dep. Var.: bal.at_least_one_both_sides
## Observations: 8,807
## Fixed-effects: year: 11, source: 6
## Standard-errors: Clustered (author_name)
## Estimate Std. Error t value Pr(>|t|)
## elite_undergrad_ivyplusTRUE 0.110242 0.028819 3.825271 0.00013808 ***
## edu.has_postgradTRUE -0.014989 0.027862 -0.537957 0.59071706
## field.journoTRUE 0.090676 0.026395 3.435335 0.00061423 ***
## age_est_2017 > 30TRUE 0.043879 0.028358 1.547326 0.12207662
## gendermale -0.029213 0.027101 -1.077940 0.28130044
## race.nonwhiteTRUE -0.052514 0.038582 -1.361096 0.17376623
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.47676 Adj. R2: 0.028732
## Within R2: 0.017153
make_models(articles2 %>% filter(bal.diff_lr_normalized == 0), c(
"!is.na(bal.at_least_one_both_sides)"
), c(j_covars))
## [[1]]
## OLS estimation, Dep. Var.: !is.na(bal.at_least_one_both_sides)
## Observations: 2,332
## Fixed-effects: year: 11, source: 6
## Standard-errors: Clustered (author_name)
## Estimate Std. Error t value Pr(>|t|)
## elite_undergrad_ivyplusTRUE 0.282633 0.050936 5.548803 4.2194e-08 ***
## edu.has_postgradTRUE 0.015394 0.043008 0.357921 7.2052e-01
## field.journoTRUE 0.181502 0.043759 4.147724 3.8126e-05 ***
## age_est_2017 > 30TRUE 0.069850 0.044404 1.573059 1.1620e-01
## gendermale -0.041185 0.040233 -1.023663 3.0638e-01
## race.nonwhiteTRUE 0.011522 0.047440 0.242885 8.0817e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.47712 Adj. R2: 0.081138
## Within R2: 0.067293
ggplot(articles2, aes(x = bal.diff_lr_normalized_nonabs, fill = bal.at_least_one_both_sides)) +
geom_histogram() +
facet_wrap(~elite_undergrad_ivyplus) +
scale_x_reverse()
# Yeah so the ivy league ppl basically are more likely to include both sides
# Reverse isn't true - conditioning on including both sides doesn't increase likelihood of slant
make_models(articles2 %>% filter(bal.at_least_one_both_sides), c(
"bal.diff_lr_normalized > 0.25"
), c(j_covars))
## [[1]]
## OLS estimation, Dep. Var.: bal.diff_lr_normalized > 0.25
## Observations: 4,480
## Fixed-effects: year: 11, source: 6
## Standard-errors: Clustered (author_name)
## Estimate Std. Error t value Pr(>|t|)
## elite_undergrad_ivyplusTRUE -0.024561 0.026565 -0.924568 0.35554
## edu.has_postgradTRUE -0.018263 0.020376 -0.896296 0.37043
## field.journoTRUE 0.034185 0.028846 1.185056 0.23643
## age_est_2017 > 30TRUE -0.048022 0.029278 -1.640241 0.10145
## gendermale -0.006272 0.020454 -0.306661 0.75920
## race.nonwhiteTRUE 0.006778 0.028647 0.236613 0.81303
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.475843 Adj. R2: 0.013472
## Within R2: 0.00354
author.sources <- sources %>%
group_by(author_name, org_id) %>%
summarize(
n = n(),
)
author.sources.t <- sources %>%
group_by(author_name) %>%
summarize(total.n = n(), ideo.mean.i = mean(cfscore, na.rm = T))
a.s <- author.sources %>%
left_join(author.sources.t) %>%
mutate(
s = n / total.n * 100
)
a.s
## # A tibble: 32,366 × 6
## # Groups: author_name [1,843]
## author_name org_id n total.n ideo.mean.i s
## <chr> <dbl> <int> <int> <dbl> <dbl>
## 1 abcarian, robin 9187 1 2 0.484 50
## 2 abcarian, robin NA 1 2 0.484 50
## 3 abderholden, frank 3450 1 9 -0.0976 11.1
## 4 abderholden, frank 9231 1 9 -0.0976 11.1
## 5 abderholden, frank 10050 4 9 -0.0976 44.4
## 6 abderholden, frank 14376 1 9 -0.0976 11.1
## 7 abderholden, frank 16242 1 9 -0.0976 11.1
## 8 abderholden, frank NA 1 9 -0.0976 11.1
## 9 abdi latif dahir 4557 1 2 NaN 50
## 10 abdi latif dahir NA 1 2 NaN 50
## # ℹ 32,356 more rows
library(hhi)
library(purrr)
compute_hhi <- function(df) {
author_name <- df$author_name
hhi <- sum(df$s^2)
data.frame(author_name, hhi)
}
df_hhi <- a.s %>%
group_split(author_name, .keep = TRUE) %>%
map(compute_hhi) %>%
bind_rows() %>%
distinct()
nrow(df_hhi)
## [1] 1843
df_hhi$hhi %>% hist()
df_hhi$hhi %>% summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 137.1 1072.7 1836.7 2368.9 3055.6 10000.0
authors <- authors %>%
left_join(df_hhi) %>%
left_join(a.s)
etable(
# Ivy league, postgrads, but also younger Js have more diverse sources
c(lapply(c("hhi", "ideo.mean.i"), \(y) {
feols(make_fmla(y, j_covars, ""), data = authors)
}),
list(feols(make_fmla('cfscore', j_covars, ""), data = sources))
)
)
## model 1 model 2
## Dependent Var.: hhi ideo.mean.i
##
## Constant 2,223.3*** (91.05) -0.4262*** (0.0560)
## elite_undergrad_ivyplusTRUE -389.1*** (85.04) 0.0472 (0.0519)
## edu.has_postgradTRUE -171.3* (67.55) 0.0328 (0.0412)
## field.journoTRUE 55.22 (69.98) 0.0659 (0.0434)
## age_est_2017>30TRUE -188.5** (66.16) 0.1430*** (0.0402)
## gendermale 89.19 (63.76) 0.1759*** (0.0396)
## race.nonwhiteTRUE -141.3. (81.11) -0.0050 (0.0507)
## ___________________________ __________________ ___________________
## S.E. type IID IID
## Observations 1,877 1,070
## R2 0.02386 0.03657
## Adj. R2 0.02073 0.03113
##
## model 3
## Dependent Var.: cfscore
##
## Constant -0.2554*** (0.0215)
## elite_undergrad_ivyplusTRUE 0.0701*** (0.0152)
## edu.has_postgradTRUE -0.0022 (0.0127)
## field.journoTRUE 0.0347* (0.0158)
## age_est_2017>30TRUE 0.1305*** (0.0143)
## gendermale -0.0448*** (0.0126)
## race.nonwhiteTRUE -0.0543** (0.0206)
## ___________________________ ___________________
## S.E. type IID
## Observations 18,459
## R2 0.00687
## Adj. R2 0.00655
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The idea behind this DiD specification is that Trump’s presidency “activated” latent slant. We test whether journalists who are demographieally more likely to believe in the “participant” press altered their coverage after Trump’s presidency compared to their counterparts.
Parallel trends holds, suggesting that these two groups of journalists did not behave differently before Trump was elected. But after, the younger and less educated journalists slanted their coverage leftward more than the older and more educated ones did.
make_did <- function(df, refyear = 2016) {
iq.model <- feols(
ideo.mean.i ~ field.journo + age_est_2017 + elite_undergrad_ivyplus + gender + race.nonwhite,
data = df
)
df$pred_ideo <- predict(iq.model, newdata = df)
df$treat <- df$pred_ideo < quantile(df$pred_ideo, na.rm = T, 0.5)
#
summary(iq.model)
df %>% ggplot(aes(x = age_est_2017, fill = treat)) +
geom_histogram()
df %>% ggplot(aes(x = field.journo, fill = treat)) +
geom_bar(stat = "count")
df %>% ggplot(aes(x = elite_undergrad_ivyplus, fill = treat)) +
facet_wrap(~field.journo) +
geom_bar(stat = "count")
df %>% ggplot(aes(x = elite_undergrad_ivyplus, fill = treat)) +
facet_wrap(~race.nonwhite) +
geom_bar(stat = "count")
raw.plt <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = as.numeric(ideo.mean.i), color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Mean article slant",
color = "Predicted Type"
) +
guides(
fill = "none"
) +
scale_color_hue(label = c("Neutral", "Participant"))
raw.plt.b <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = as.numeric(balance_all), color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Article Balance",
color = "Predicted Type"
) +
guides(
fill = "none"
) +
scale_color_hue(label = c("Neutral", "Participant"))
raw.plt.c <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = bal.diff_lr_normalized, color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Article Balance (Diff LR Normalized)",
color = "Predicted Type"
) +
guides(
fill = "none"
) +
scale_color_hue(label = c("Neutral", "Participant"))
raw.plt.d <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = n, color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Number of Sources",
color = "Predicted Type"
) +
guides(
fill = "none"
) +
scale_color_hue(label = c("Neutral", "Participant"))
raw.plt.e <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = n_unique_source_category, color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Number of Source Types",
color = "Predicted Type"
) +
guides(
fill = "none"
) +
scale_color_hue(label = c("Neutral", "Participant"))
df <- df %>% mutate(
es.treat = case_when(
treat == 0 ~ 0,
treat == 1 & year < refyear ~ 0,
TRUE ~ 1
)
)
event_study_model <- feols(
ideo.mean.i ~ i(year, es.treat, ref = refyear) | year + author_name, df
)
event_study_model_balance <- feols(
as.numeric(balance_all) ~ i(year, es.treat, ref = refyear) | year + author_name, df
)
twfe <- feols(ideo.mean.i ~ (year > refyear) * treat | year + author_name, df)
list(
"raw_trends_plot" = raw.plt,
"article_balance_plot" = raw.plt.b,
"article_balance_plot2" = raw.plt.c,
"n_plot" = raw.plt.d,
"n_cat_plot" = raw.plt.e,
"event_study_model" = event_study_model,
"event_study_model_balance" = event_study_model_balance,
"twfe_model" = twfe
)
}
did <- make_did(articles, 2016)
did$raw_trends_plot
ggsave(here("paper/figures/did-raw-trends.png"), width = 6, height = 4)
ggarrange(plotlist = list(
did$n_plot,
did$n_cat_plot,
did$article_balance_plot,
did$article_balance_plot2
), common.legend = T)
library(ggfixest)
ggiplot(did$event_study_model) + labs(
title = "",
y = "Effect on Article Slant",
x = "Year"
)
# Event study plot
ggsave(here("paper/figures/did-event-study-slant.png"), width = 8, height = 5)
ggiplot(did$event_study_model_balance) + labs(
title = "",
y = "Effect on Article Balance",
x = "Year"
)
ggsave(here("paper/figures/did-event-study-balance.png"), width = 8, height = 5)
# TWFE
etable(did$twfe)
## did$twfe
## Dependent Var.: ideo.mean.i
##
## year>2016TRUE x treatTRUE -0.0533 (0.0302)
## Fixed-Effects: ----------------
## year Yes
## author_name Yes
## _________________________ ________________
## S.E.: Clustered by: year
## Observations 8,703
## R2 0.23417
## Within R2 0.00027
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We can also run a DiD where the control group is pre-2016 conservative journalists. It suggests that previously liberal journalists actually became more conservative (or that previously conservative journalists became more liberal) during Trump’s presidency.
I’m not exactly sure what this shows. The results are consistent with an alternative explanation that the most liberal journalists in the 2012-2016 period are just followers of presidential cues - the liberal cohort returns to liberalness after Biden comes into office.
authors_pre2017 <- articles %>%
filter(year < 2017) %>%
group_by(author_name) %>%
summarize(
ideo = mean(ideo.mean.i, na.rm = T)
) %>%
filter(!is.na(ideo))
authors_pre2017$ideo %>% summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.47000 -0.51534 -0.25272 -0.24424 -0.02143 1.19392
articles.2 <- articles %>%
left_join(authors_pre2017) %>%
mutate(
ideo_median = case_when(
ideo > median(articles$ideo.mean.i, na.rm = T) ~ "Conservative",
ideo <= median(articles$ideo.mean.i, na.rm = T) ~ "Liberal",
TRUE ~ "New"
),
treat = case_when(
ideo_median == "Liberal" ~ TRUE,
ideo_median == "Conservative" ~ FALSE,
TRUE ~ NA
)
)
articles.2 %>%
filter(ideo_median != "New") %>%
ggplot(aes(x = date, y = ideo.mean.i, color = ideo_median)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed")
# TWFE
feols(ideo.mean.i ~ post2017 * treat | year + author_name, data = articles.2)
## OLS estimation, Dep. Var.: ideo.mean.i
## Observations: 6,478
## Fixed-effects: year: 11, author_name: 603
## Standard-errors: Clustered (year)
## Estimate Std. Error t value Pr(>|t|)
## post2017TRUE:treatTRUE 0.228178 0.04137 5.51558 0.00025614 ***
## ... 2 variables were removed because of collinearity (post2017TRUE and treatTRUE)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.431888 Adj. R2: 0.153967
## Within R2: 0.008122
feols(balance_all ~ post2017 * treat | year + author_name, data = articles.2)
## OLS estimation, Dep. Var.: balance_all
## Observations: 6,357
## Fixed-effects: year: 11, author_name: 575
## Standard-errors: Clustered (year)
## Estimate Std. Error t value Pr(>|t|)
## post2017TRUE:treatTRUE 0.151388 0.04368 3.46584 0.0060632 **
## ... 2 variables were removed because of collinearity (post2017TRUE and treatTRUE)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.444967 Adj. R2: 0.127735
## Within R2: 0.003368
# Event study
feols(ideo.mean.i ~ i(year, treat, ref = 2016) | year + author, articles.2) %>% coefplot()
make_did2 <- function(df) {
iq.model <- feols(
ideo.mean.i ~ field.journo + age_est_2017 + elite_undergrad_ivyplus + gender + race.nonwhite,
data = df
)
df$pred_ideo <- predict(iq.model, newdata = df)
df$treat1 <- df$pred_ideo < quantile(df$pred_ideo, na.rm = T, 0.2)
df$treat2 <- df$pred_ideo > quantile(df$pred_ideo, na.rm = T, 0.8)
df$treat <- ifelse(df$treat1, "Most Liberal", ifelse(df$treat2, "Most Conservative", "Control"))
raw.plt <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = as.numeric(ideo.mean.i), color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Mean article slant",
color = "Predicted Type"
) +
guides(
fill = "none"
) #+
# scale_color_hue(label = c("Neutral", "Participant"))
raw.plt.b <- df %>%
filter(!is.na(treat)) %>%
ggplot(aes(x = date, y = as.numeric(balance_all), color = treat, fill = treat)) +
geom_smooth(se = T, method = "loess") +
geom_vline(xintercept = as.Date("2016/01/01"), lty = "dashed") +
theme_bw() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
) +
labs(
x = "Year",
y = "Article Balance",
color = "Predicted Type"
) +
guides(
fill = "none"
)
# scale_color_hue(label = c("Neutral", "Participant"))
event_study_model <- feols(
ideo.mean.i ~ i(year, treat, ref = 2015) | year + author_name, df
)
event_study_model_balance <- feols(
as.numeric(balance_all) ~ i(year, treat, ref = 2016) | year + author_name, df
)
twfe <- feols(ideo.mean.i ~ (year > 2015) * treat | year + author_name, df)
list(
"raw_trends_plot" = raw.plt,
"article_balance_plot" = raw.plt.b,
"event_study_model" = event_study_model,
"event_study_model_balance" = event_study_model_balance,
"twfe_model" = twfe
)
}
did <- make_did2(articles)
did$raw_trends_plot
did$article_balance_plot
# Event study plot
did$event_study_model
## OLS estimation, Dep. Var.: ideo.mean.i
## Observations: 8,703
## Fixed-effects: year: 11, author_name: 858
## Standard-errors: Clustered (year)
## Estimate Std. Error t value Pr(>|t|)
## year::2012:treat::Control -0.016307 0.051576 -0.316184 0.758364
## year::2012:treat::Most Conservative -0.143528 0.067946 -2.112374 0.060799
## year::2013:treat::Control 0.189849 0.096324 1.970953 0.077022
## year::2013:treat::Most Conservative -0.024784 0.080583 -0.307556 0.764730
## year::2014:treat::Control -0.096048 0.053158 -1.806843 0.100921
## year::2014:treat::Most Conservative -0.200043 0.064215 -3.115188 0.010963
## year::2016:treat::Control -0.728755 42766.307803 -0.000017 0.999987
## year::2016:treat::Most Conservative -0.684842 42766.287709 -0.000016 0.999988
##
## year::2012:treat::Control
## year::2012:treat::Most Conservative .
## year::2013:treat::Control .
## year::2013:treat::Most Conservative
## year::2014:treat::Control
## year::2014:treat::Most Conservative *
## year::2016:treat::Control
## year::2016:treat::Most Conservative
## ... 13 coefficients remaining (display them with summary() or use argument n)
## ... 9 variables were removed because of collinearity (year::2012:treat::Most Liberal, year::2013:treat::Most Liberal and 7 others [full set in $collin.var])
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.423671 Adj. R2: 0.150296
## Within R2: 0.003974
df <- articles
m.post <- feols(
ideo.mean.i ~ field.journo + age_est_2017 + elite_undergrad_ivyplus + gender + race.nonwhite | year + source,
data = df %>% filter(year > 2016)
)
m.pre <- feols(
ideo.mean.i ~ field.journo + age_est_2017 + elite_undergrad_ivyplus + gender + race.nonwhite | year + source,
data = df %>% filter(year <= 2016)
)
m.all <- feols(
ideo.mean.i ~ field.journo + age_est_2017 + elite_undergrad_ivyplus + gender + race.nonwhite | year + source,
data = df
)
m.all.nofe <- feols(
ideo.mean.i ~ field.journo + age_est_2017 + elite_undergrad_ivyplus + gender + race.nonwhite,
data = df
)
etable(list(m.pre, m.post, m.all, m.all.nofe))
## model 1 model 2
## Dependent Var.: ideo.mean.i ideo.mean.i
##
## field.journoTRUE 0.0667 (0.0328) 0.0642** (0.0137)
## age_est_2017 -0.0012 (0.0016) 0.0018 (0.0012)
## elite_undergrad_ivyplusTRUE 0.0197 (0.0304) 0.1071* (0.0278)
## gendermale -0.0554 (0.0559) -0.0065 (0.0192)
## race.nonwhiteTRUE -0.0800. (0.0342) -0.0607* (0.0232)
## Constant
## Fixed-Effects: ----------------- -----------------
## year Yes Yes
## source Yes Yes
## ___________________________ _________________ _________________
## S.E. type by: year by: year
## Observations 1,740 6,963
## R2 0.05382 0.07361
## Within R2 0.01137 0.01499
##
## model 3 model 4
## Dependent Var.: ideo.mean.i ideo.mean.i
##
## field.journoTRUE 0.0657*** (0.0132) 0.0755*** (0.0127)
## age_est_2017 0.0014 (0.0010) 0.0015** (0.0005)
## elite_undergrad_ivyplusTRUE 0.0927** (0.0223) 0.0610*** (0.0123)
## gendermale -0.0167 (0.0219) -0.0306** (0.0110)
## race.nonwhiteTRUE -0.0663** (0.0198) -0.0789*** (0.0164)
## Constant -0.3174*** (0.0214)
## Fixed-Effects: ------------------ -------------------
## year Yes No
## source Yes No
## ___________________________ __________________ ___________________
## S.E. type by: year IID
## Observations 8,703 8,703
## R2 0.06780 0.01054
## Within R2 0.01230 --
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Could do articles about trump vs articles not about trump?
How many articles do we have journalist education for?
articles.with.author <- articles %>% filter(!is.na(author))
articles.with.author.cfscore <- articles.with.author %>% filter(!is.na(ideo.mean.i))
nrow(articles.with.author)
## [1] 12348
nrow(articles.with.author.cfscore)
## [1] 11314
table(!is.na(articles.with.author$edu.undergrad)) %>% prop.table()
##
## FALSE TRUE
## 0.1187237 0.8812763
table(!is.na(articles.with.author$age_est)) %>% prop.table()
##
## FALSE TRUE
## 0.08317136 0.91682864
with(
articles.with.author.cfscore,
table(!is.na(age_est) & !is.na(elite_undergrad_ivyplus) & !is.na(edu.has_postgrad) & !is.na(field.journo) & !is.na(gender) & !is.na(race.nonwhite))
)
##
## FALSE TRUE
## 2678 8636
nrow(articles.with.author.cfscore %>% filter(is.na(gender)))
## [1] 290
nrow(articles.with.author.cfscore %>% filter(is.na(race.nonwhite)))
## [1] 96
nrow(articles.with.author.cfscore %>% filter(is.na(field.journo)))
## [1] 2462
nrow(articles.with.author.cfscore %>% filter(is.na(edu.has_postgrad)))
## [1] 826
nrow(articles.with.author.cfscore %>% filter(is.na(elite_undergrad_ivyplus)))
## [1] 1297
nrow(articles.with.author.cfscore %>% filter(is.na(age_est)))
## [1] 908
articles.with.author %>%
filter(
is.na(age_est) | is.na(elite_undergrad_ivyplus) | is.na(edu.has_postgrad) | is.na(field.journo) | is.na(gender) | is.na(race.nonwhite)
) %>%
group_by(
author_name, edu.undergrad, edu.field, edu.grad_year, edu.has_postgrad, exp.year_start, gender, race.nonwhite
) %>%
summarize(n = n(), newspapers = paste0(unique(source), collapse = ",")) %>%
arrange(desc(n)) %>%
write_tsv("manual_01-07-25.tsv")
How many articles do we have journalist age for?
How many sources do we have DIME scores for?
Proportion of all sources:
table(sources$cfscore.impute %>% is.na, useNA = "ifany") %>% prop.table()
##
## FALSE TRUE
## 0.5452656 0.4547344
nrow(sources %>% filter(!is.na(cfscore))) / nrow(sources)
## [1] 0.3082604
table(
!is.na(sources$cfscore),
sources$category.slant
)
##
## Academic Advocacy Bureaucrat Business Democrat Environmental
## FALSE 7506 2981 9635 3647 3658 7638
## TRUE 0 2384 0 2894 7549 4265
##
## Fossil Fuel International Media Other Politician Republican
## FALSE 1180 6829 4318 3955 1578 653
## TRUE 2422 46 0 695 0 3621
Proportion of non government organizations:
n_eligible <- sources %>%
filter(tolower(category.slant) %in% c("advocacy", "environmental", "fossil fuel", "business", "democrat", "republican")) %>%
nrow()
# Excludes bureaucrat, media, other
nrow(sources %>% filter(!is.na(cfscore))) / n_eligible
## [1] 0.5566539
articles %>% ggplot(aes(x = ideo.mean.i, color = source, fill = source)) +
geom_density(alpha = 0.1) +
geom_hline(yintercept = 0) +
theme_bw() +
theme(panel.grid = element_blank()) +
labs(x = "Ideology", y = "Density")
ggsave(here("paper/figures/article-ideology-hist.png"), width = 6, height = 4)
articles %>%
filter(!is.na(pred.ideo.q)) %>%
group_by(author_name) %>%
summarize(pred.ideo = first(pred.ideo), pred.ideo.q = first(pred.ideo.q)) %>%
ggplot(aes(x = pred.ideo, fill = pred.ideo.q)) +
geom_histogram() +
geom_hline(yintercept = 0) +
theme_bw() +
theme(panel.grid = element_blank()) +
labs(x = "Predicted Ideology", y = "Count", fill = "Quartile")
ggsave(here("paper/figures/journo-pred-ideology-hist.png"), width = 6, height = 4)
articles %>% ggplot(aes(x = pred.ideo, y = ideo.mean.i, color = pred.ideo.q)) +
geom_point()
sources %>%
mutate(
category = factor(category, levels = c(
"Academic",
"Advocacy",
"Business",
"Bureaucrat",
"Politician",
"International",
"Media",
"Other"
))
) %>%
group_by(category, source) %>%
summarize(n = n()) %>%
group_by(source) %>%
mutate(prop = n / sum(n)) %>%
ungroup() %>%
ggplot(
aes(x = reorder(source, n), fill = fct_rev(category), y = prop)
) +
geom_bar(stat = "identity") +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "bottom") +
labs(x = "", y = "Prop", fill = "Category") +
scale_fill_brewer(palette = "Set2", direction = -1) +
coord_flip() +
guides(fill = guide_legend(reverse = TRUE))
ggsave(here("paper/figures/source-dist.png"), width = 8, height = 6)
sources.s2 <- sources %>%
mutate(category.slant2 = case_when(
category.slant %in% c("Bureaucrat", "International", "Politician") ~ "Other",
category.slant %in% c("Academic", "Media", "Advocacy") ~ "Other",
TRUE ~ category.slant
)) %>%
mutate(
category.slant2 = factor(category.slant2, levels = c(
"Environmental",
"Business",
"Fossil Fuel",
"Democrat",
"Republican",
"Other"
))
)
sources.s2 %>%
group_by(category.slant2, source) %>%
summarize(n = n()) %>%
group_by(source) %>%
mutate(prop = n / sum(n)) %>%
ungroup() %>%
ggplot(
aes(x = reorder(source, n), fill = fct_rev(category.slant2), y = prop)
) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c(
"Environmental" = "#91bfdb",
"Democrat" = "#4575b4",
"Republican" = "#d73027",
"Fossil Fuel" = "#fc8d59",
"Business" = "#fee090",
"Other" = "#ffffbf"
)) +
theme_bw() +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "bottom") +
labs(x = "", y = "Prop", fill = "Category") +
coord_flip() +
guides(fill = guide_legend(reverse = TRUE))
ggsave(here("paper/figures/source-dist-2.png"), width = 8, height = 6)
sources.s2$category.slant2 %>%
table() %>%
prop.table()
## .
## Environmental Business Fossil Fuel Democrat Republican
## 0.15367831 0.08445013 0.04650502 0.14469233 0.05518114
## Other
## 0.51549307
sources.s2 %>%
group_by(category.slant2) %>%
summarize(cfscore = mean(cfscore, na.rm = T))
## # A tibble: 6 × 2
## category.slant2 cfscore
## <fct> <dbl>
## 1 Environmental -0.821
## 2 Business 0.105
## 3 Fossil Fuel 0.347
## 4 Democrat -0.573
## 5 Republican 0.895
## 6 Other -0.0914
models <- list(
feols(ideo.mean.i ~ elite_undergrad_ivyplus, data = articles),
feols(ideo.mean.i ~ edu.has_postgrad, data = articles),
feols(ideo.mean.i ~ field.journo, data = articles),
feols(ideo.mean.i ~ age_est > 40, data = articles),
feols(ideo.mean.i ~ gender, data = articles),
feols(ideo.mean.i ~ !race.nonwhite, data = articles),
feols(n_ff / (n_ff + n_env) ~ elite_undergrad_ivyplus, data = articles),
feols(n_ff / (n_ff + n_env) ~ edu.has_postgrad, data = articles),
feols(n_ff / (n_ff + n_env) ~ field.journo, data = articles),
feols(n_ff / (n_ff + n_env) ~ age_est > 40, data = articles),
feols(n_ff / (n_ff + n_env) ~ gender, data = articles),
feols(n_ff / (n_ff + n_env) ~ !race.nonwhite, data = articles),
feols(n_rep / (n_rep + n_dem) ~ elite_undergrad_ivyplus, data = articles),
feols(n_rep / (n_rep + n_dem) ~ edu.has_postgrad, data = articles),
feols(n_rep / (n_rep + n_dem) ~ field.journo, data = articles),
feols(n_rep / (n_rep + n_dem) ~ age_est > 40, data = articles),
feols(n_rep / (n_rep + n_dem) ~ gender, data = articles),
feols(n_rep / (n_rep + n_dem) ~ !race.nonwhite, data = articles),
feols(ideo.mean ~ elite_undergrad_ivyplus, data = articles),
feols(ideo.mean ~ edu.has_postgrad, data = articles),
feols(ideo.mean ~ field.journo, data = articles),
feols(ideo.mean ~ age_est > 40, data = articles),
feols(ideo.mean ~ gender, data = articles),
feols(ideo.mean ~ !race.nonwhite, data = articles)
)
res <- lapply(models, \(m) {
beta <- m$coeftable[2, 1]
sig <- m$coeftable[2, 4] < 0.05
xname <- rownames(m$coeftable)[[2]]
yname <- as.character(m$fml)[2]
list(
"beta" = beta,
"sig" = sig,
"x" = xname,
"y" = yname
)
}) %>% bind_rows()
res %>%
mutate(
beta = round(beta, 2),
fc = case_when(
sig & beta > 0 ~ "coral",
sig & beta < 0 ~ "cyan",
TRUE ~ "grey"
)
) %>%
ggplot(aes(x = y, y = x, fill = fc, label = beta)) +
geom_tile() +
geom_text(size = 8) +
scale_fill_manual(values = c("coral", "cyan3", "grey"))
all.sources$cfscore_std <- all.sources$cfscore - mean(all.sources$cfscore, na.rm = T)
models <-
list(
lm(n_env + n_ff ~ policy_label_gpt, data = all.articles),
lm(n_rep + n_dem ~ policy_label_gpt, data = all.articles)
)
stargazer(models, type = "html")
| Dependent variable: | ||
| n_env + n_ff | n_rep + n_dem | |
| (1) | (2) | |
| policy_label_gpt | 0.491*** | 0.825*** |
| (0.015) | (0.016) | |
| Constant | 0.689*** | 0.416*** |
| (0.010) | (0.011) | |
| Observations | 28,121 | 28,121 |
| R2 | 0.036 | 0.083 |
| Adjusted R2 | 0.036 | 0.083 |
| Residual Std. Error (df = 28119) | 1.261 | 1.363 |
| F Statistic (df = 1; 28119) | 1,063.074*** | 2,561.112*** |
| Note: | p<0.1; p<0.05; p<0.01 | |
stargazer(models)
% Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
% Date and time: Sat, Feb 08, 2025 - 16:19:53
\begin{table}[!htbp] \centering
\caption{}
\label{}
\begin{tabular}{@{\extracolsep{5pt}}lcc}
\\[-1.8ex]\hline
\hline \\[-1.8ex]
& \multicolumn{2}{c}{\textit{Dependent variable:}} \\
\cline{2-3}
\\[-1.8ex] & n\_env + n\_ff & n\_rep + n\_dem \\
\\[-1.8ex] & (1) & (2)\\
\hline \\[-1.8ex]
policy\_label\_gpt & 0.491$^{***}$ & 0.825$^{***}$ \\
& (0.015) & (0.016) \\
& & \\
Constant & 0.689$^{***}$ & 0.416$^{***}$ \\
& (0.010) & (0.011) \\
& & \\
\hline \\[-1.8ex]
Observations & 28,121 & 28,121 \\
R$^{2}$ & 0.036 & 0.083 \\
Adjusted R$^{2}$ & 0.036 & 0.083 \\
Residual Std. Error (df = 28119) & 1.261 & 1.363 \\
F Statistic (df = 1; 28119) & 1,063.074$^{***}$ & 2,561.112$^{***}$ \\
\hline
\hline \\[-1.8ex]
\textit{Note:} & \multicolumn{2}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
\end{tabular}
\end{table}
models <- make_models(all.articles, c("ideo.mean.i", "balance_all"), j_covars)
models2 <- make_models(articles, c("ideo.mean", "-prop_topic_environment"), j_covars)
etable(models2)
## model 1 model 2
## Dependent Var.: ideo.mean -prop_topic_environment
##
## elite_undergrad_ivyplusTRUE 0.0970*** (0.0290) 0.0813*** (0.0154)
## edu.has_postgradTRUE -0.0307 (0.0248) -0.0162 (0.0127)
## field.journoTRUE 0.0340 (0.0328) 0.0664*** (0.0174)
## age_est_2017>30TRUE 0.0555* (0.0250) 0.0129 (0.0189)
## gendermale -0.0083 (0.0231) -0.0005 (0.0132)
## race.nonwhiteTRUE -0.0637. (0.0384) -0.0634** (0.0210)
## Fixed-Effects: ------------------ -----------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ _______________________
## S.E.: Clustered by: author_name by: author_name
## Observations 7,051 9,349
## R2 0.05908 0.08660
## Within R2 0.00800 0.04112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etable(c(models, models2))
## model 1 model 2
## Dependent Var.: ideo.mean.i balance_all
##
## elite_undergrad_ivyplusTRUE 0.0526** (0.0165) 0.1722*** (0.0287)
## edu.has_postgradTRUE -0.0101 (0.0159) -0.0326 (0.0275)
## field.journoTRUE 0.0444* (0.0183) 0.0998*** (0.0251)
## age_est_2017>30TRUE 0.0417** (0.0146) 0.0503. (0.0275)
## gendermale -0.0032 (0.0148) -0.0019 (0.0255)
## race.nonwhiteTRUE -0.0501* (0.0223) -0.0666. (0.0340)
## Fixed-Effects: ----------------- ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ _________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 15,008 13,699
## R2 0.04795 0.05041
## Within R2 0.00654 0.03303
##
## model 3 model 4
## Dependent Var.: ideo.mean -prop_topic_environment
##
## elite_undergrad_ivyplusTRUE 0.0970*** (0.0290) 0.0813*** (0.0154)
## edu.has_postgradTRUE -0.0307 (0.0248) -0.0162 (0.0127)
## field.journoTRUE 0.0340 (0.0328) 0.0664*** (0.0174)
## age_est_2017>30TRUE 0.0555* (0.0250) 0.0129 (0.0189)
## gendermale -0.0083 (0.0231) -0.0005 (0.0132)
## race.nonwhiteTRUE -0.0637. (0.0384) -0.0634** (0.0210)
## Fixed-Effects: ------------------ -----------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ _______________________
## S.E.: Clustered by: author_name by: author_name
## Observations 7,051 9,349
## R2 0.05908 0.08660
## Within R2 0.00800 0.04112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
did <- make_did(all.articles)
pdf(here("paper/figures/did-event-study-alt.pdf"))
did$event_study_model %>%
coefplot(main = "", xlab = "")
dev.off()
## quartz_off_screen
## 2
etable(c(models, models2), tex = T, digits = 2)
\begingroup
\centering
\begin{tabular}{lcccc}
\tabularnewline \midrule \midrule
Dependent Variables: & ideo.mean.i & balance\_all & ideo.mean & -prop\_topic\_environment\\
Model: & (1) & (2) & (3) & (4)\\
\midrule
\emph{Variables}\\
elite\_undergrad\_ivyplusTRUE & 0.05$^{***}$ & 0.17$^{***}$ & 0.10$^{***}$ & 0.08$^{***}$\\
& (0.02) & (0.03) & (0.03) & (0.01)\\
edu.has\_postgradTRUE & -0.01 & -0.03 & -0.03 & -0.02\\
& (0.02) & (0.03) & (0.03) & (0.01)\\
field.journoTRUE & 0.04$^{**}$ & 0.10$^{***}$ & 0.03 & 0.07$^{***}$\\
& (0.02) & (0.03) & (0.03) & (0.02)\\
age\_est\_2017>30TRUE & 0.04$^{***}$ & 0.05$^{*}$ & 0.06$^{**}$ & 0.01\\
& (0.01) & (0.03) & (0.03) & (0.02)\\
gendermale & -0.003 & -0.002 & -0.008 & -0.0005\\
& (0.01) & (0.03) & (0.02) & (0.01)\\
race.nonwhiteTRUE & -0.05$^{**}$ & -0.07$^{*}$ & -0.06$^{*}$ & -0.06$^{***}$\\
& (0.02) & (0.03) & (0.04) & (0.02)\\
\midrule
\emph{Fixed-effects}\\
year & Yes & Yes & Yes & Yes\\
source & Yes & Yes & Yes & Yes\\
\midrule
\emph{Fit statistics}\\
Observations & 15,008 & 13,699 & 7,051 & 9,349\\
R$^2$ & 0.04795 & 0.05041 & 0.05908 & 0.08660\\
Within R$^2$ & 0.00654 & 0.03303 & 0.00800 & 0.04112\\
\midrule \midrule
\multicolumn{5}{l}{\emph{Clustered (author\_name) standard-errors in parentheses}}\\
\multicolumn{5}{l}{\emph{Signif. Codes: ***: 0.01, **: 0.05, *: 0.1}}\\
\end{tabular}
\par\endgroup
models3 <- make_models(articles, c(
"ideo.mean.pols",
"ideo.mean.i.orgs",
"n_ff",
"n_env",
"n_rep",
"n_dem",
"n_right_dime",
"-n_left_dime",
"(n_ff > 0)",
"(n_env > 0)",
"(n_rep > 0)",
"(n_dem > 0)",
"prop_ff",
"prop_env",
"prop_rep",
"prop_dem"
), j_covars)
etable(models3)
## model 1 model 2
## Dependent Var.: ideo.mean.pols ideo.mean.i.orgs
##
## elite_undergrad_ivyplusTRUE 0.0507 (0.0378) 0.1117** (0.0403)
## edu.has_postgradTRUE -0.0467 (0.0321) -0.0325 (0.0373)
## field.journoTRUE -0.0035 (0.0429) -0.0030 (0.0469)
## age_est_2017>30TRUE 0.1140*** (0.0296) 0.0346 (0.0394)
## gendermale -0.0379 (0.0301) 0.0194 (0.0335)
## race.nonwhiteTRUE -0.0992* (0.0473) -0.0416 (0.0684)
## Fixed-Effects: ------------------ -----------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ _________________
## S.E.: Clustered by: author_name by: author_name
## Observations 4,492 4,956
## R2 0.09378 0.05159
## Within R2 0.00989 0.00733
##
## model 3 model 4
## Dependent Var.: n_ff n_env
##
## elite_undergrad_ivyplusTRUE 0.0953 (0.0650) 0.0776 (0.0775)
## edu.has_postgradTRUE -0.0184 (0.0616) 0.0741 (0.0607)
## field.journoTRUE 0.0123 (0.0726) -0.1549* (0.0646)
## age_est_2017>30TRUE 0.0274 (0.0580) -0.0116 (0.0876)
## gendermale 0.0124 (0.0561) 0.0489 (0.0626)
## race.nonwhiteTRUE -0.0347 (0.0760) 0.2569** (0.0828)
## Fixed-Effects: ---------------- -----------------
## year Yes Yes
## source Yes Yes
## ___________________________ ________________ _________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.02671 0.04064
## Within R2 0.00396 0.01189
##
## model 5 model 6
## Dependent Var.: n_rep n_dem
##
## elite_undergrad_ivyplusTRUE 0.1879*** (0.0534) 0.3820*** (0.1132)
## edu.has_postgradTRUE -0.0834 (0.0526) -0.1297 (0.1144)
## field.journoTRUE 0.1483** (0.0461) 0.4698*** (0.1127)
## age_est_2017>30TRUE 0.0516 (0.0568) -0.1656 (0.1689)
## gendermale -0.0727 (0.0451) -0.0575 (0.0980)
## race.nonwhiteTRUE -0.0998* (0.0502) -0.1762. (0.1039)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.06115 0.09073
## Within R2 0.02433 0.04155
##
## model 7 model 8
## Dependent Var.: n_right_dime -n_left_dime
##
## elite_undergrad_ivyplusTRUE 0.2522*** (0.0552) -0.2356** (0.0857)
## edu.has_postgradTRUE -0.1339* (0.0540) 0.0943 (0.0823)
## field.journoTRUE 0.1110* (0.0534) -0.2506** (0.0917)
## age_est_2017>30TRUE 0.0394 (0.0582) 0.1425 (0.1169)
## gendermale -0.0340 (0.0476) -0.0304 (0.0822)
## race.nonwhiteTRUE -0.1279. (0.0716) 0.0582 (0.0769)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.04070 0.06299
## Within R2 0.02309 0.01600
##
## model 9 model 10
## Dependent Var.: n_ff>0 n_env>0
##
## elite_undergrad_ivyplusTRUE 0.0677* (0.0307) 0.0416 (0.0270)
## edu.has_postgradTRUE 0.0007 (0.0282) 0.0199 (0.0238)
## field.journoTRUE 0.0296 (0.0330) -0.0787** (0.0277)
## age_est_2017>30TRUE 0.0187 (0.0284) 0.0265 (0.0328)
## gendermale -0.0082 (0.0269) 0.0033 (0.0236)
## race.nonwhiteTRUE -0.0283 (0.0343) 0.0911** (0.0316)
## Fixed-Effects: ---------------- ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ ________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.03022 0.04237
## Within R2 0.00717 0.01269
##
## model 11 model 12
## Dependent Var.: n_rep>0 n_dem>0
##
## elite_undergrad_ivyplusTRUE 0.1159*** (0.0322) 0.1925*** (0.0418)
## edu.has_postgradTRUE -0.0504 (0.0319) -0.0053 (0.0332)
## field.journoTRUE 0.0850** (0.0273) 0.1736*** (0.0339)
## age_est_2017>30TRUE 0.0328 (0.0327) 0.0042 (0.0408)
## gendermale -0.0494. (0.0281) -0.0283 (0.0333)
## race.nonwhiteTRUE -0.0617* (0.0304) -0.0377 (0.0398)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.06748 0.07851
## Within R2 0.02975 0.03916
##
## model 13 model 14
## Dependent Var.: prop_ff prop_env
##
## elite_undergrad_ivyplusTRUE 0.0134 (0.0097) 0.0019 (0.0100)
## edu.has_postgradTRUE -0.0008 (0.0096) 0.0107 (0.0091)
## field.journoTRUE 0.0048 (0.0126) -0.0243* (0.0105)
## age_est_2017>30TRUE 0.0078 (0.0081) -0.0022 (0.0120)
## gendermale 0.0010 (0.0088) 0.0079 (0.0089)
## race.nonwhiteTRUE -0.0070 (0.0108) 0.0452** (0.0143)
## Fixed-Effects: ---------------- -----------------
## year Yes Yes
## source Yes Yes
## ___________________________ ________________ _________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.03902 0.03227
## Within R2 0.00338 0.01113
##
## model 15 model 16
## Dependent Var.: prop_rep prop_dem
##
## elite_undergrad_ivyplusTRUE 0.0280** (0.0085) 0.0541* (0.0215)
## edu.has_postgradTRUE -0.0130 (0.0086) -0.0081 (0.0196)
## field.journoTRUE 0.0281*** (0.0079) 0.0880*** (0.0210)
## age_est_2017>30TRUE 0.0080 (0.0085) -0.0128 (0.0274)
## gendermale -0.0127. (0.0074) -0.0123 (0.0169)
## race.nonwhiteTRUE -0.0171* (0.0082) -0.0305 (0.0205)
## Fixed-Effects: ------------------ ------------------
## year Yes Yes
## source Yes Yes
## ___________________________ __________________ __________________
## S.E.: Clustered by: author_name by: author_name
## Observations 9,349 9,349
## R2 0.06281 0.07175
## Within R2 0.02249 0.03117
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
etable(models3, tex = T)
\begingroup
\centering
\begin{tabular}{lcccccccccccccccc}
\tabularnewline \midrule \midrule
Dependent Variables: & ideo.mean.pols & ideo.mean.i.orgs & n\_ff & n\_env & n\_rep & n\_dem & n\_right\_dime & -n\_left\_dime & n\_ff>0 & n\_env>0 & n\_rep>0 & n\_dem>0 & prop\_ff & prop\_env & prop\_rep & prop\_dem\\
Model: & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) & (10) & (11) & (12) & (13) & (14) & (15) & (16)\\
\midrule
\emph{Variables}\\
elite\_undergrad\_ivyplusTRUE & 0.0507 & 0.1117$^{***}$ & 0.0953 & 0.0776 & 0.1879$^{***}$ & 0.3820$^{***}$ & 0.2522$^{***}$ & -0.2356$^{***}$ & 0.0677$^{**}$ & 0.0416 & 0.1159$^{***}$ & 0.1925$^{***}$ & 0.0134 & 0.0019 & 0.0280$^{***}$ & 0.0541$^{**}$\\
& (0.0378) & (0.0403) & (0.0650) & (0.0775) & (0.0534) & (0.1132) & (0.0552) & (0.0857) & (0.0307) & (0.0270) & (0.0322) & (0.0418) & (0.0097) & (0.0100) & (0.0085) & (0.0215)\\
edu.has\_postgradTRUE & -0.0467 & -0.0325 & -0.0184 & 0.0741 & -0.0834 & -0.1297 & -0.1339$^{**}$ & 0.0943 & 0.0007 & 0.0199 & -0.0504 & -0.0053 & -0.0008 & 0.0107 & -0.0130 & -0.0081\\
& (0.0321) & (0.0373) & (0.0616) & (0.0607) & (0.0526) & (0.1144) & (0.0540) & (0.0823) & (0.0282) & (0.0238) & (0.0319) & (0.0332) & (0.0096) & (0.0091) & (0.0086) & (0.0196)\\
field.journoTRUE & -0.0035 & -0.0030 & 0.0123 & -0.1549$^{**}$ & 0.1483$^{***}$ & 0.4698$^{***}$ & 0.1110$^{**}$ & -0.2506$^{***}$ & 0.0296 & -0.0787$^{***}$ & 0.0850$^{***}$ & 0.1736$^{***}$ & 0.0048 & -0.0243$^{**}$ & 0.0281$^{***}$ & 0.0880$^{***}$\\
& (0.0429) & (0.0469) & (0.0726) & (0.0646) & (0.0461) & (0.1127) & (0.0534) & (0.0917) & (0.0330) & (0.0277) & (0.0273) & (0.0339) & (0.0126) & (0.0105) & (0.0079) & (0.0210)\\
age\_est\_2017>30TRUE & 0.1140$^{***}$ & 0.0346 & 0.0274 & -0.0116 & 0.0516 & -0.1656 & 0.0394 & 0.1425 & 0.0187 & 0.0265 & 0.0328 & 0.0042 & 0.0078 & -0.0022 & 0.0080 & -0.0128\\
& (0.0296) & (0.0394) & (0.0580) & (0.0876) & (0.0568) & (0.1689) & (0.0582) & (0.1169) & (0.0284) & (0.0328) & (0.0327) & (0.0408) & (0.0081) & (0.0120) & (0.0085) & (0.0274)\\
gendermale & -0.0379 & 0.0194 & 0.0124 & 0.0489 & -0.0727 & -0.0575 & -0.0340 & -0.0304 & -0.0082 & 0.0033 & -0.0494$^{*}$ & -0.0283 & 0.0010 & 0.0079 & -0.0127$^{*}$ & -0.0123\\
& (0.0301) & (0.0335) & (0.0561) & (0.0626) & (0.0451) & (0.0980) & (0.0476) & (0.0822) & (0.0269) & (0.0236) & (0.0281) & (0.0333) & (0.0088) & (0.0089) & (0.0074) & (0.0169)\\
race.nonwhiteTRUE & -0.0992$^{**}$ & -0.0416 & -0.0347 & 0.2569$^{***}$ & -0.0998$^{**}$ & -0.1762$^{*}$ & -0.1279$^{*}$ & 0.0582 & -0.0283 & 0.0911$^{***}$ & -0.0617$^{**}$ & -0.0377 & -0.0070 & 0.0452$^{***}$ & -0.0171$^{**}$ & -0.0305\\
& (0.0473) & (0.0684) & (0.0760) & (0.0828) & (0.0502) & (0.1039) & (0.0716) & (0.0769) & (0.0343) & (0.0316) & (0.0304) & (0.0398) & (0.0108) & (0.0143) & (0.0082) & (0.0205)\\
\midrule
\emph{Fixed-effects}\\
year & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes\\
source & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes & Yes\\
\midrule
\emph{Fit statistics}\\
Observations & 4,492 & 4,956 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349 & 9,349\\
R$^2$ & 0.09378 & 0.05159 & 0.02671 & 0.04064 & 0.06115 & 0.09073 & 0.04070 & 0.06299 & 0.03022 & 0.04237 & 0.06748 & 0.07851 & 0.03902 & 0.03227 & 0.06281 & 0.07175\\
Within R$^2$ & 0.00989 & 0.00733 & 0.00396 & 0.01189 & 0.02433 & 0.04155 & 0.02309 & 0.01600 & 0.00717 & 0.01269 & 0.02975 & 0.03916 & 0.00338 & 0.01113 & 0.02249 & 0.03117\\
\midrule \midrule
\multicolumn{17}{l}{\emph{Clustered (author\_name) standard-errors in parentheses}}\\
\multicolumn{17}{l}{\emph{Signif. Codes: ***: 0.01, **: 0.05, *: 0.1}}\\
\end{tabular}
\par\endgroup
articles %>%
ggplot(aes(x = date, y = ideo.mean.i, color = elite_undergrad_ivyplus, group = elite_undergrad_ivyplus)) +
geom_smooth(se = F, method = "loess") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
labs(title = "Ideology of sources over time by elite education")
nyt.articles <- articles %>% filter(source == "New York Times")
nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = ideo.mean.i)) +
geom_point(alpha = 0.3, size = 10) +
scale_color_gradient(low = "blue", high = "red")
nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = balance_all)) +
geom_point()
hist(nyt.articles$n_comments)
sd(nyt.articles$n_comments, na.rm = T)
## [1] 512.5869
table(nyt.articles$n_comments %>% is.na())
##
## FALSE TRUE
## 1363 3308
nyt.articles %>% ggplot(aes(x = ideo.mean.i, y = n_comments)) +
geom_point() +
geom_smooth() +
theme_bw()
ggsave(here("paper/figures/nyt-comments-slant-scatter.png"), width = 6, height = 5)
summary(feols(n_comments ~ ideo.mean.i, data = nyt.articles))
## OLS estimation, Dep. Var.: n_comments
## Observations: 1,301
## Standard-errors: IID
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 388.4150 15.8482 24.50848 < 2.2e-16 ***
## ideo.mean.i 92.9292 31.2870 2.97022 0.0030305 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 518.7 Adj. R2: 0.005981
m <- feols(n_comments ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad, data = nyt.articles)
etable(m, tex = T)
## \begingroup
## \centering
## \begin{tabular}{lc}
## \tabularnewline \midrule \midrule
## Dependent Variable: & n\_comments\\
## Model: & (1)\\
## \midrule
## \emph{Variables}\\
## Constant & 391.9$^{***}$\\
## & (99.39)\\
## elite\_undergrad\_ivyplusTRUE & 81.80$^{*}$\\
## & (48.56)\\
## age\_est\_2017 & 0.9671\\
## & (2.344)\\
## race.nonwhiteTRUE & -55.58\\
## & (62.47)\\
## gendermale & -25.65\\
## & (45.79)\\
## field.journoTRUE & 174.2$^{***}$\\
## & (54.27)\\
## edu.has\_postgradTRUE & -144.1$^{***}$\\
## & (38.99)\\
## \midrule
## \emph{Fit statistics}\\
## Observations & 978\\
## R$^2$ & 0.03599\\
## Adjusted R$^2$ & 0.03003\\
## \midrule \midrule
## \multicolumn{2}{l}{\emph{IID standard-errors in parentheses}}\\
## \multicolumn{2}{l}{\emph{Signif. Codes: ***: 0.01, **: 0.05, *: 0.1}}\\
## \end{tabular}
## \par\endgroup
summary(feols(n_comments ~ n + n_unique_source_category | year, data = nyt.articles)) summary(feols(log(n_comments) ~ n + n_unique_source_category | year, data = nyt.articles))
summary(feols(log(n_comments) ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad, data = nyt.articles))
summary(feols(n_comments ~ balance_all, data = nyt.articles)) summary(feols(n_comments ~ ideo.mean.i + balance_all, data = nyt.articles)) summary(feols(log(n_comments) ~ ideo.mean.i + balance_all, data = nyt.articles))
summary(feols(n_comments ~ ideo.mean.i + balance_all + n + n_unique_source_category, data = nyt.articles)) summary(feols(log(n_comments) ~ ideo.mean.i + balance_all + n + n_unique_source_category, data = nyt.articles)) summary(feols(n_comments ~ ideo.mean.i + balance_all + post2017, data = nyt.articles))
summary(feols(n_comments ~ (ideo.mean.i > 0) * post2017, data = nyt.articles)) summary(feols(log(n_comments) ~ (ideo.mean.i > 0) * post2017, data = nyt.articles))
nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = balance_all)) + geom_smooth(se = F, method = “lm”) + geom_point() nyt.articles %>% ggplot(aes(x = date, y = n_comments, color = ideo.mean.i > 0)) + geom_smooth(se = F, method = “lm”) + geom_point()
etable( list( feols(n_comments ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad, data = nyt.articles), # feols(n_comments ~ elite_undergrad_ivyplus + age_est_2017 + race.nonwhite + gender + field.journo + edu.has_postgrad | year, data = nyt.articles), feols(n_comments ~ ideo.mean.i + balance_all + n + n_unique_source_category, data = nyt.articles) # feols(n_comments ~ ideo.mean.i + balance_all + n + n_unique_source_category | year, data = nyt.articles) ) ) ```